library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(patchwork)
library("leaps")
library(faraway)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:faraway':
## 
##     melanoma
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(broom)

Data loading

cancer_reg = read_csv("./data/Cancer_Registry.csv") %>%
  janitor::clean_names() %>%
  dplyr::select(target_death_rate, everything()) %>%
  separate(geography, into = c("county", "state"), sep = ",")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   avgDeathsPerYear = col_integer(),
##   medIncome = col_integer(),
##   popEst2015 = col_integer(),
##   binnedInc = col_character(),
##   Geography = col_character()
## )
## See spec(...) for full column specifications.

There are in total 35 variables and 3047observations in the dataset.

Our outcome of interest is target_death_rate

dealing with missing data

#missing data

#colSums(is.na(cancer_reg))

##pct_some_col18_24 has 2285 NAs, pct_employed_coverage_alone has 609 NA, pct_employed16_over has 152 NAs


missing_value = sapply(cancer_reg[1:34], function(x) sum(length(which(is.na(x)))))


# Percentage of missing value
percentage_missing = sapply(cancer_reg[1:34], function(x) sum(length(which(is.na(x)))) / nrow(cancer_reg))
percentage_missing %>% data.frame()
##                                     .
## target_death_rate          0.00000000
## avg_ann_count              0.00000000
## avg_deaths_per_year        0.00000000
## incidence_rate             0.00000000
## med_income                 0.00000000
## pop_est2015                0.00000000
## poverty_percent            0.00000000
## study_per_cap              0.00000000
## binned_inc                 0.00000000
## median_age                 0.00000000
## median_age_male            0.00000000
## median_age_female          0.00000000
## county                     0.00000000
## state                      0.00000000
## avg_household_size         0.00000000
## percent_married            0.00000000
## pct_no_hs18_24             0.00000000
## pct_hs18_24                0.00000000
## pct_some_col18_24          0.74991795
## pct_bach_deg18_24          0.00000000
## pct_hs25_over              0.00000000
## pct_bach_deg25_over        0.00000000
## pct_employed16_over        0.04988513
## pct_unemployed16_over      0.00000000
## pct_private_coverage       0.00000000
## pct_private_coverage_alone 0.19986872
## pct_emp_priv_coverage      0.00000000
## pct_public_coverage        0.00000000
## pct_public_coverage_alone  0.00000000
## pct_white                  0.00000000
## pct_black                  0.00000000
## pct_asian                  0.00000000
## pct_other_race             0.00000000
## pct_married_households     0.00000000
##getting rid of variables with missing values.

cancer_reg = cancer_reg %>% dplyr::select(-pct_some_col18_24, -pct_private_coverage_alone, -binned_inc, -median_age) 

  ##removed binned_inc since we already have median income and median age since it is avg of median age female and male. so we'll build model with those those factors instead. 


##percentage missing for pct_employed16_over is  ~5%, checking to see if its correlated with the outcome 
reg = lm(target_death_rate~pct_employed16_over, data = cancer_reg) %>%
  summary()

##Since the p-value is small, we will retain pct_employed16_over

cancer_reg = cancer_reg %>% select(-county, -state) %>%
  mutate(mortality = avg_deaths_per_year/pop_est2015, prevalence = avg_ann_count/pop_est2015) %>%
  dplyr::select(-pop_est2015, -avg_ann_count, -avg_deaths_per_year) %>%
 mutate(study_per_cap =  
        as.factor(ifelse(study_per_cap == 0, "none", 
                         ifelse(study_per_cap < quantile(study_per_cap, .25), "low",
                         ifelse(study_per_cap < quantile(study_per_cap, .5), "medium" ,
                                ifelse(study_per_cap < quantile(study_per_cap, .75), "high", "very high")))))) %>%
  mutate(pct_non_white = pct_black+ pct_asian + pct_other_race) %>%
  dplyr::select(-pct_black, -pct_asian, -pct_other_race) ##since the number of white people are a lot higher, putting the other minorities under a single variable.

  ##Since count itself can be misleading, taking proportion will give us a better model. 
 ##removed state and county variables since we're building a predictive model, area doesn't really matter.
 ##changed study_per_cap to factor variable

exploratory analysis

hist(cancer_reg$target_death_rate) #outcome is normally distributed

hist(cancer_reg$pct_private_coverage)

hist(cancer_reg$pct_public_coverage)

hist(cancer_reg$pct_emp_priv_coverage)

hist(cancer_reg$pct_public_coverage_alone)

hist(cancer_reg$incidence_rate) ##right skewed

hist(cancer_reg$med_income) #somewhat right skewed-mostly ok

hist(cancer_reg$poverty_percent)

hist(cancer_reg$median_age_male)

hist(cancer_reg$median_age_female)

hist(cancer_reg$avg_household_size) ##left skewed

hist(cancer_reg$percent_married)

hist(cancer_reg$pct_no_hs18_24) #somehwat right

hist(cancer_reg$pct_hs18_24)

hist(cancer_reg$pct_bach_deg18_24)#right skewed

hist(cancer_reg$pct_hs25_over)

hist(cancer_reg$pct_bach_deg25_over)

hist(cancer_reg$pct_employed16_over)

hist(cancer_reg$pct_unemployed16_over)

hist(cancer_reg$pct_white) #left  skewed

hist(cancer_reg$pct_non_white) #right skewed

hist(cancer_reg$pct_married_households)

hist(cancer_reg$birth_rate)

hist(cancer_reg$mortality)

hist(cancer_reg$prevalence) #right skewed

##they are all almost normally distributed

descriptive statistics: cont variable

cont_var = dplyr::select(cancer_reg, target_death_rate, everything(), -c(study_per_cap))
knitr::kable(summary(cont_var), caption = "descriptive statistics for continuous variables")
descriptive statistics for continuous variables
target_death_rate incidence_rate med_income poverty_percent median_age_male median_age_female avg_household_size percent_married pct_no_hs18_24 pct_hs18_24 pct_bach_deg18_24 pct_hs25_over pct_bach_deg25_over pct_employed16_over pct_unemployed16_over pct_private_coverage pct_emp_priv_coverage pct_public_coverage pct_public_coverage_alone pct_white pct_married_households birth_rate mortality prevalence pct_non_white
Min. : 59.7 Min. : 201.3 Min. : 22640 Min. : 3.20 Min. :22.40 Min. :22.30 Min. :0.0221 Min. :23.10 Min. : 0.00 Min. : 0.0 Min. : 0.000 Min. : 7.50 Min. : 2.50 Min. :17.60 Min. : 0.400 Min. :22.30 Min. :13.5 Min. :11.20 Min. : 2.60 Min. : 10.20 Min. :22.99 Min. : 0.000 Min. :0.000485 Min. :0.0009281 Min. : 0.000
1st Qu.:161.2 1st Qu.: 420.3 1st Qu.: 38882 1st Qu.:12.15 1st Qu.:36.35 1st Qu.:39.10 1st Qu.:2.3700 1st Qu.:47.75 1st Qu.:12.80 1st Qu.:29.2 1st Qu.: 3.100 1st Qu.:30.40 1st Qu.: 9.40 1st Qu.:48.60 1st Qu.: 5.500 1st Qu.:57.20 1st Qu.:34.5 1st Qu.:30.90 1st Qu.:14.85 1st Qu.: 77.30 1st Qu.:47.76 1st Qu.: 4.521 1st Qu.:0.001888 1st Qu.:0.0048022 1st Qu.: 1.964
Median :178.1 Median : 453.5 Median : 45207 Median :15.90 Median :39.60 Median :42.40 Median :2.5000 Median :52.40 Median :17.10 Median :34.7 Median : 5.400 Median :35.30 Median :12.30 Median :54.50 Median : 7.600 Median :65.10 Median :41.1 Median :36.30 Median :18.80 Median : 90.06 Median :51.67 Median : 5.381 Median :0.002290 Median :0.0056236 Median : 5.569
Mean :178.7 Mean : 448.3 Mean : 47063 Mean :16.88 Mean :39.57 Mean :42.15 Mean :2.4797 Mean :51.77 Mean :18.22 Mean :35.0 Mean : 6.158 Mean :34.80 Mean :13.28 Mean :54.15 Mean : 7.852 Mean :64.35 Mean :41.2 Mean :36.25 Mean :19.24 Mean : 83.65 Mean :51.24 Mean : 5.640 Mean :0.002287 Mean :0.0232443 Mean :12.345
3rd Qu.:195.2 3rd Qu.: 480.9 3rd Qu.: 52492 3rd Qu.:20.40 3rd Qu.:42.50 3rd Qu.:45.30 3rd Qu.:2.6300 3rd Qu.:56.40 3rd Qu.:22.70 3rd Qu.:40.7 3rd Qu.: 8.200 3rd Qu.:39.65 3rd Qu.:16.10 3rd Qu.:60.30 3rd Qu.: 9.700 3rd Qu.:72.10 3rd Qu.:47.7 3rd Qu.:41.55 3rd Qu.:23.10 3rd Qu.: 95.45 3rd Qu.:55.40 3rd Qu.: 6.494 3rd Qu.:0.002681 3rd Qu.:0.0064874 3rd Qu.:16.974
Max. :362.8 Max. :1206.9 Max. :125635 Max. :47.40 Max. :64.70 Max. :65.70 Max. :3.9700 Max. :72.50 Max. :64.10 Max. :72.5 Max. :51.800 Max. :54.80 Max. :42.20 Max. :80.10 Max. :29.400 Max. :92.30 Max. :70.7 Max. :65.10 Max. :46.60 Max. :100.00 Max. :78.08 Max. :21.326 Max. :0.005136 Max. :2.3675123 Max. :86.066
NA NA NA NA NA NA NA NA NA NA NA NA NA NA’s :152 NA NA NA NA NA NA NA NA NA NA NA

descriptive stat: cat variable

cancer_reg %>%
group_by(study_per_cap) %>%
count() %>%
ungroup() %>%
mutate(prop = n / sum(n)) %>%
knitr::kable(digits = 2, caption = "Descriptive Statistics for clinical trial")
Descriptive Statistics for clinical trial
study_per_cap n prop
high 354 0.12
none 1931 0.63
very high 762 0.25

subset

(not considering interaction)

multi.fit = lm(target_death_rate ~ ., data = cancer_reg)
summary(multi.fit)
## 
## Call:
## lm(formula = target_death_rate ~ ., data = cancer_reg)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -82.182  -7.532   0.058   7.141  84.257 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                2.032e+02  1.058e+01  19.206  < 2e-16 ***
## incidence_rate             8.692e-02  5.297e-03  16.410  < 2e-16 ***
## med_income                 2.619e-04  5.343e-05   4.902 1.00e-06 ***
## poverty_percent           -3.129e-03  1.075e-01  -0.029 0.976775    
## study_per_capnone         -9.543e-01  8.107e-01  -1.177 0.239270    
## study_per_capvery high    -1.389e+00  8.532e-01  -1.628 0.103542    
## median_age_male           -6.012e-01  1.410e-01  -4.263 2.08e-05 ***
## median_age_female         -2.264e+00  1.499e-01 -15.100  < 2e-16 ***
## avg_household_size        -1.197e-02  6.501e-01  -0.018 0.985307    
## percent_married            1.174e-02  1.177e-01   0.100 0.920606    
## pct_no_hs18_24            -1.113e-03  3.771e-02  -0.030 0.976454    
## pct_hs18_24                2.967e-01  3.324e-02   8.927  < 2e-16 ***
## pct_bach_deg18_24          3.625e-02  7.233e-02   0.501 0.616350    
## pct_hs25_over              8.539e-02  6.429e-02   1.328 0.184219    
## pct_bach_deg25_over       -3.547e-01  1.052e-01  -3.373 0.000753 ***
## pct_employed16_over       -6.641e-01  7.320e-02  -9.072  < 2e-16 ***
## pct_unemployed16_over      7.958e-01  1.121e-01   7.099 1.58e-12 ***
## pct_private_coverage      -2.716e-01  8.736e-02  -3.109 0.001896 ** 
## pct_emp_priv_coverage      4.285e-01  7.042e-02   6.085 1.32e-09 ***
## pct_public_coverage       -1.964e+00  1.544e-01 -12.720  < 2e-16 ***
## pct_public_coverage_alone  1.802e+00  1.904e-01   9.464  < 2e-16 ***
## pct_white                 -1.341e-01  3.874e-02  -3.460 0.000548 ***
## pct_married_households     5.822e-02  1.134e-01   0.513 0.607691    
## birth_rate                -5.153e-01  1.292e-01  -3.989 6.81e-05 ***
## mortality                  4.949e+04  7.916e+02  62.524  < 2e-16 ***
## prevalence                -1.699e+01  2.422e+00  -7.013 2.89e-12 ***
## pct_non_white             -4.812e-02  3.746e-02  -1.284 0.199120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.71 on 2868 degrees of freedom
##   (152 observations deleted due to missingness)
## Multiple R-squared:  0.7877, Adjusted R-squared:  0.7857 
## F-statistic: 409.2 on 26 and 2868 DF,  p-value: < 2.2e-16
cancer_subset = cancer_reg %>%
  select(target_death_rate, incidence_rate, med_income, median_age_male, median_age_female, pct_hs18_24, pct_bach_deg25_over, pct_employed16_over, pct_unemployed16_over, pct_public_coverage_alone, birth_rate, mortality, prevalence, pct_private_coverage, pct_emp_priv_coverage, pct_public_coverage )

coverage

reg1 = lm(target_death_rate~pct_private_coverage, data = cancer_reg) %>%
  summary()
reg2 = lm(target_death_rate~pct_emp_priv_coverage, data = cancer_reg) %>%
  summary()
reg3 = lm(target_death_rate~pct_public_coverage, data = cancer_reg) %>%
  summary()
reg4 = lm(target_death_rate~pct_public_coverage_alone , data = cancer_reg) %>%
  summary()

##I'd pick public_coverage alone since it has max r^2

plot(cancer_reg$pct_private_coverage, cancer_reg$target_death_rate)
abline(reg1,lwd = 2,col = 2)
## Warning in abline(reg1, lwd = 2, col = 2): only using the first two of 8
## regression coefficients

plot(cancer_reg$pct_emp_priv_coverage, cancer_reg$target_death_rate)
abline(reg2,lwd = 2,col = 2)
## Warning in abline(reg2, lwd = 2, col = 2): only using the first two of 8
## regression coefficients

plot(cancer_reg$pct_public_coverage, cancer_reg$target_death_rate)
abline(reg3,lwd = 2,col = 2)
## Warning in abline(reg3, lwd = 2, col = 2): only using the first two of 8
## regression coefficients

plot(cancer_reg$pct_public_coverage_alone, cancer_reg$target_death_rate)
abline(reg4,lwd = 2,col = 2)
## Warning in abline(reg4, lwd = 2, col = 2): only using the first two of 8
## regression coefficients

Automatic variable selection by stepwise regression

# Fit MLR with cancer_subset
canc.fit <- lm(target_death_rate ~ ., data = cancer_reg)

# Automatic procedure: Stepwise regression to select model
step.fit <- step(canc.fit, direction='backward')
## Start:  AIC=14748.59
## target_death_rate ~ incidence_rate + med_income + poverty_percent + 
##     study_per_cap + median_age_male + median_age_female + avg_household_size + 
##     percent_married + pct_no_hs18_24 + pct_hs18_24 + pct_bach_deg18_24 + 
##     pct_hs25_over + pct_bach_deg25_over + pct_employed16_over + 
##     pct_unemployed16_over + pct_private_coverage + pct_emp_priv_coverage + 
##     pct_public_coverage + pct_public_coverage_alone + pct_white + 
##     pct_married_households + birth_rate + mortality + prevalence + 
##     pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - avg_household_size         1         0  463514 14747
## - poverty_percent            1         0  463515 14747
## - pct_no_hs18_24             1         0  463515 14747
## - percent_married            1         2  463516 14747
## - pct_bach_deg18_24          1        41  463555 14747
## - pct_married_households     1        43  463557 14747
## - study_per_cap              2       429  463944 14747
## - pct_non_white              1       267  463781 14748
## - pct_hs25_over              1       285  463799 14748
## <none>                                    463514 14749
## - pct_private_coverage       1      1562  465076 14756
## - pct_bach_deg25_over        1      1839  465353 14758
## - pct_white                  1      1935  465449 14759
## - birth_rate                 1      2571  466086 14763
## - median_age_male            1      2937  466451 14765
## - med_income                 1      3884  467398 14771
## - pct_emp_priv_coverage      1      5983  469498 14784
## - prevalence                 1      7949  471463 14796
## - pct_unemployed16_over      1      8145  471659 14797
## - pct_hs18_24                1     12880  476394 14826
## - pct_employed16_over        1     13302  476816 14828
## - pct_public_coverage_alone  1     14475  477990 14836
## - pct_public_coverage        1     26151  489666 14906
## - median_age_female          1     36852  500367 14968
## - incidence_rate             1     43521  507035 15006
## - mortality                  1    631789 1095304 17236
## 
## Step:  AIC=14746.59
## target_death_rate ~ incidence_rate + med_income + poverty_percent + 
##     study_per_cap + median_age_male + median_age_female + percent_married + 
##     pct_no_hs18_24 + pct_hs18_24 + pct_bach_deg18_24 + pct_hs25_over + 
##     pct_bach_deg25_over + pct_employed16_over + pct_unemployed16_over + 
##     pct_private_coverage + pct_emp_priv_coverage + pct_public_coverage + 
##     pct_public_coverage_alone + pct_white + pct_married_households + 
##     birth_rate + mortality + prevalence + pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - poverty_percent            1         0  463515 14745
## - pct_no_hs18_24             1         0  463515 14745
## - percent_married            1         2  463516 14745
## - pct_bach_deg18_24          1        41  463555 14745
## - pct_married_households     1        44  463559 14745
## - study_per_cap              2       429  463944 14745
## - pct_non_white              1       267  463781 14746
## - pct_hs25_over              1       285  463800 14746
## <none>                                    463514 14747
## - pct_private_coverage       1      1567  465082 14754
## - pct_bach_deg25_over        1      1840  465354 14756
## - pct_white                  1      1945  465460 14757
## - birth_rate                 1      2574  466088 14761
## - median_age_male            1      2937  466452 14763
## - med_income                 1      3898  467412 14769
## - pct_emp_priv_coverage      1      5987  469501 14782
## - prevalence                 1      7951  471465 14794
## - pct_unemployed16_over      1      8162  471676 14795
## - pct_hs18_24                1     12913  476428 14824
## - pct_employed16_over        1     13363  476877 14827
## - pct_public_coverage_alone  1     14476  477991 14834
## - pct_public_coverage        1     26152  489666 14904
## - median_age_female          1     37059  500574 14967
## - incidence_rate             1     43652  507167 15005
## - mortality                  1    631789 1095304 17234
## 
## Step:  AIC=14744.59
## target_death_rate ~ incidence_rate + med_income + study_per_cap + 
##     median_age_male + median_age_female + percent_married + pct_no_hs18_24 + 
##     pct_hs18_24 + pct_bach_deg18_24 + pct_hs25_over + pct_bach_deg25_over + 
##     pct_employed16_over + pct_unemployed16_over + pct_private_coverage + 
##     pct_emp_priv_coverage + pct_public_coverage + pct_public_coverage_alone + 
##     pct_white + pct_married_households + birth_rate + mortality + 
##     prevalence + pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - pct_no_hs18_24             1         0  463515 14743
## - percent_married            1         2  463516 14743
## - pct_bach_deg18_24          1        40  463555 14743
## - pct_married_households     1        45  463559 14743
## - study_per_cap              2       429  463944 14743
## - pct_non_white              1       270  463785 14744
## - pct_hs25_over              1       286  463801 14744
## <none>                                    463515 14745
## - pct_private_coverage       1      1597  465112 14753
## - pct_bach_deg25_over        1      1859  465374 14754
## - pct_white                  1      1947  465462 14755
## - birth_rate                 1      2579  466094 14759
## - median_age_male            1      2943  466458 14761
## - med_income                 1      5024  468538 14774
## - pct_emp_priv_coverage      1      6003  469518 14780
## - prevalence                 1      7951  471465 14792
## - pct_unemployed16_over      1      8163  471677 14793
## - pct_hs18_24                1     12962  476477 14822
## - pct_public_coverage_alone  1     14683  478197 14833
## - pct_employed16_over        1     15098  478613 14835
## - pct_public_coverage        1     26245  489759 14902
## - median_age_female          1     37452  500966 14968
## - incidence_rate             1     43655  507170 15003
## - mortality                  1    633305 1096819 17236
## 
## Step:  AIC=14742.59
## target_death_rate ~ incidence_rate + med_income + study_per_cap + 
##     median_age_male + median_age_female + percent_married + pct_hs18_24 + 
##     pct_bach_deg18_24 + pct_hs25_over + pct_bach_deg25_over + 
##     pct_employed16_over + pct_unemployed16_over + pct_private_coverage + 
##     pct_emp_priv_coverage + pct_public_coverage + pct_public_coverage_alone + 
##     pct_white + pct_married_households + birth_rate + mortality + 
##     prevalence + pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - percent_married            1         2  463516 14741
## - pct_bach_deg18_24          1        42  463557 14741
## - pct_married_households     1        45  463559 14741
## - study_per_cap              2       432  463947 14741
## - pct_non_white              1       270  463785 14742
## - pct_hs25_over              1       288  463802 14742
## <none>                                    463515 14743
## - pct_private_coverage       1      1692  465207 14751
## - pct_bach_deg25_over        1      1874  465388 14752
## - pct_white                  1      1957  465472 14753
## - birth_rate                 1      2588  466103 14757
## - median_age_male            1      2945  466459 14759
## - med_income                 1      5144  468658 14772
## - pct_emp_priv_coverage      1      6004  469519 14778
## - prevalence                 1      7969  471484 14790
## - pct_unemployed16_over      1      8228  471743 14792
## - pct_hs18_24                1     13894  477409 14826
## - pct_public_coverage_alone  1     14766  478281 14831
## - pct_employed16_over        1     15147  478662 14834
## - pct_public_coverage        1     26245  489760 14900
## - median_age_female          1     37967  501482 14968
## - incidence_rate             1     43723  507238 15002
## - mortality                  1    637651 1101166 17246
## 
## Step:  AIC=14740.61
## target_death_rate ~ incidence_rate + med_income + study_per_cap + 
##     median_age_male + median_age_female + pct_hs18_24 + pct_bach_deg18_24 + 
##     pct_hs25_over + pct_bach_deg25_over + pct_employed16_over + 
##     pct_unemployed16_over + pct_private_coverage + pct_emp_priv_coverage + 
##     pct_public_coverage + pct_public_coverage_alone + pct_white + 
##     pct_married_households + birth_rate + mortality + prevalence + 
##     pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - pct_bach_deg18_24          1        43  463560 14739
## - study_per_cap              2       431  463947 14739
## - pct_married_households     1       196  463713 14740
## - pct_non_white              1       271  463788 14740
## - pct_hs25_over              1       286  463803 14740
## <none>                                    463516 14741
## - pct_private_coverage       1      1690  465207 14749
## - pct_bach_deg25_over        1      1885  465401 14750
## - pct_white                  1      1959  465475 14751
## - birth_rate                 1      2621  466138 14755
## - median_age_male            1      3046  466562 14758
## - med_income                 1      5203  468720 14771
## - pct_emp_priv_coverage      1      6124  469640 14777
## - prevalence                 1      7972  471488 14788
## - pct_unemployed16_over      1      8235  471752 14790
## - pct_hs18_24                1     14029  477545 14825
## - pct_public_coverage_alone  1     14773  478289 14829
## - pct_employed16_over        1     21243  484760 14868
## - pct_public_coverage        1     26303  489820 14898
## - median_age_female          1     37974  501490 14967
## - incidence_rate             1     43768  507284 15000
## - mortality                  1    661380 1124896 17305
## 
## Step:  AIC=14738.88
## target_death_rate ~ incidence_rate + med_income + study_per_cap + 
##     median_age_male + median_age_female + pct_hs18_24 + pct_hs25_over + 
##     pct_bach_deg25_over + pct_employed16_over + pct_unemployed16_over + 
##     pct_private_coverage + pct_emp_priv_coverage + pct_public_coverage + 
##     pct_public_coverage_alone + pct_white + pct_married_households + 
##     birth_rate + mortality + prevalence + pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - study_per_cap              2       434  463994 14738
## - pct_married_households     1       169  463729 14738
## - pct_non_white              1       261  463821 14738
## - pct_hs25_over              1       304  463863 14739
## <none>                                    463560 14739
## - pct_private_coverage       1      1651  465210 14747
## - pct_bach_deg25_over        1      1846  465406 14748
## - pct_white                  1      1930  465490 14749
## - birth_rate                 1      2677  466237 14754
## - median_age_male            1      3010  466569 14756
## - med_income                 1      5417  468977 14770
## - pct_emp_priv_coverage      1      6089  469649 14775
## - prevalence                 1      7932  471491 14786
## - pct_unemployed16_over      1      8194  471754 14788
## - pct_hs18_24                1     14305  477864 14825
## - pct_public_coverage_alone  1     15143  478703 14830
## - pct_employed16_over        1     21206  484766 14866
## - pct_public_coverage        1     26596  490156 14898
## - median_age_female          1     37989  501549 14965
## - incidence_rate             1     43748  507308 14998
## - mortality                  1    661336 1124896 17303
## 
## Step:  AIC=14737.59
## target_death_rate ~ incidence_rate + med_income + median_age_male + 
##     median_age_female + pct_hs18_24 + pct_hs25_over + pct_bach_deg25_over + 
##     pct_employed16_over + pct_unemployed16_over + pct_private_coverage + 
##     pct_emp_priv_coverage + pct_public_coverage + pct_public_coverage_alone + 
##     pct_white + pct_married_households + birth_rate + mortality + 
##     prevalence + pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - pct_married_households     1       200  464194 14737
## - pct_non_white              1       225  464219 14737
## <none>                                    463994 14738
## - pct_hs25_over              1       331  464325 14738
## - pct_private_coverage       1      1657  465651 14746
## - pct_white                  1      1867  465861 14747
## - pct_bach_deg25_over        1      1888  465882 14747
## - birth_rate                 1      2763  466757 14753
## - median_age_male            1      2990  466984 14754
## - med_income                 1      5612  469606 14770
## - pct_emp_priv_coverage      1      6006  470000 14773
## - prevalence                 1      7965  471958 14785
## - pct_unemployed16_over      1      8349  472343 14787
## - pct_hs18_24                1     14221  478215 14823
## - pct_public_coverage_alone  1     15134  479128 14828
## - pct_employed16_over        1     21337  485331 14866
## - pct_public_coverage        1     26622  490616 14897
## - median_age_female          1     38248  502242 14965
## - incidence_rate             1     44187  508181 14999
## - mortality                  1    661507 1125501 17301
## 
## Step:  AIC=14736.83
## target_death_rate ~ incidence_rate + med_income + median_age_male + 
##     median_age_female + pct_hs18_24 + pct_hs25_over + pct_bach_deg25_over + 
##     pct_employed16_over + pct_unemployed16_over + pct_private_coverage + 
##     pct_emp_priv_coverage + pct_public_coverage + pct_public_coverage_alone + 
##     pct_white + birth_rate + mortality + prevalence + pct_non_white
## 
##                             Df Sum of Sq     RSS   AIC
## - pct_non_white              1       262  464456 14736
## <none>                                    464194 14737
## - pct_hs25_over              1       339  464533 14737
## - pct_private_coverage       1      1573  465767 14745
## - pct_white                  1      1683  465877 14745
## - pct_bach_deg25_over        1      2435  466629 14750
## - birth_rate                 1      2693  466887 14752
## - median_age_male            1      2882  467076 14753
## - pct_emp_priv_coverage      1      5806  470000 14771
## - med_income                 1      7619  471813 14782
## - prevalence                 1      7936  472130 14784
## - pct_unemployed16_over      1      8183  472377 14785
## - pct_hs18_24                1     14575  478769 14824
## - pct_public_coverage_alone  1     15425  479619 14830
## - pct_employed16_over        1     22509  486703 14872
## - pct_public_coverage        1     28133  492327 14905
## - median_age_female          1     38150  502344 14964
## - incidence_rate             1     44051  508245 14997
## - mortality                  1    668614 1132808 17318
## 
## Step:  AIC=14736.47
## target_death_rate ~ incidence_rate + med_income + median_age_male + 
##     median_age_female + pct_hs18_24 + pct_hs25_over + pct_bach_deg25_over + 
##     pct_employed16_over + pct_unemployed16_over + pct_private_coverage + 
##     pct_emp_priv_coverage + pct_public_coverage + pct_public_coverage_alone + 
##     pct_white + birth_rate + mortality + prevalence
## 
##                             Df Sum of Sq     RSS   AIC
## <none>                                    464456 14736
## - pct_hs25_over              1       427  464883 14737
## - pct_private_coverage       1      1717  466173 14745
## - pct_bach_deg25_over        1      2280  466736 14749
## - birth_rate                 1      2564  467020 14750
## - pct_white                  1      2736  467192 14752
## - median_age_male            1      2781  467238 14752
## - pct_emp_priv_coverage      1      5646  470102 14769
## - prevalence                 1      7829  472285 14783
## - pct_unemployed16_over      1      8117  472573 14785
## - med_income                 1      8262  472719 14786
## - pct_hs18_24                1     14840  479297 14826
## - pct_public_coverage_alone  1     15181  479637 14828
## - pct_employed16_over        1     22264  486720 14870
## - pct_public_coverage        1     27887  492343 14903
## - median_age_female          1     40509  504965 14977
## - incidence_rate             1     43795  508251 14995
## - mortality                  1    669213 1133669 17318

Criterion based procedure for variable selection

# Summary of models for each size
b<-regsubsets(target_death_rate ~ ., nvmax = 26, data=cancer_reg)
(rs<-summary(b))
## Subset selection object
## Call: regsubsets.formula(target_death_rate ~ ., nvmax = 26, data = cancer_reg)
## 26 Variables  (and intercept)
##                           Forced in Forced out
## incidence_rate                FALSE      FALSE
## med_income                    FALSE      FALSE
## poverty_percent               FALSE      FALSE
## study_per_capnone             FALSE      FALSE
## study_per_capvery high        FALSE      FALSE
## median_age_male               FALSE      FALSE
## median_age_female             FALSE      FALSE
## avg_household_size            FALSE      FALSE
## percent_married               FALSE      FALSE
## pct_no_hs18_24                FALSE      FALSE
## pct_hs18_24                   FALSE      FALSE
## pct_bach_deg18_24             FALSE      FALSE
## pct_hs25_over                 FALSE      FALSE
## pct_bach_deg25_over           FALSE      FALSE
## pct_employed16_over           FALSE      FALSE
## pct_unemployed16_over         FALSE      FALSE
## pct_private_coverage          FALSE      FALSE
## pct_emp_priv_coverage         FALSE      FALSE
## pct_public_coverage           FALSE      FALSE
## pct_public_coverage_alone     FALSE      FALSE
## pct_white                     FALSE      FALSE
## pct_married_households        FALSE      FALSE
## birth_rate                    FALSE      FALSE
## mortality                     FALSE      FALSE
## prevalence                    FALSE      FALSE
## pct_non_white                 FALSE      FALSE
## 1 subsets of each size up to 26
## Selection Algorithm: exhaustive
##           incidence_rate med_income poverty_percent study_per_capnone
## 1  ( 1 )  " "            " "        " "             " "              
## 2  ( 1 )  " "            " "        " "             " "              
## 3  ( 1 )  " "            " "        " "             " "              
## 4  ( 1 )  "*"            " "        " "             " "              
## 5  ( 1 )  "*"            " "        " "             " "              
## 6  ( 1 )  "*"            " "        " "             " "              
## 7  ( 1 )  "*"            " "        " "             " "              
## 8  ( 1 )  "*"            " "        " "             " "              
## 9  ( 1 )  "*"            " "        " "             " "              
## 10  ( 1 ) "*"            " "        " "             " "              
## 11  ( 1 ) "*"            " "        " "             " "              
## 12  ( 1 ) "*"            "*"        " "             " "              
## 13  ( 1 ) "*"            "*"        " "             " "              
## 14  ( 1 ) "*"            "*"        " "             " "              
## 15  ( 1 ) "*"            "*"        " "             " "              
## 16  ( 1 ) "*"            "*"        " "             " "              
## 17  ( 1 ) "*"            "*"        " "             " "              
## 18  ( 1 ) "*"            "*"        " "             " "              
## 19  ( 1 ) "*"            "*"        " "             " "              
## 20  ( 1 ) "*"            "*"        " "             "*"              
## 21  ( 1 ) "*"            "*"        " "             "*"              
## 22  ( 1 ) "*"            "*"        " "             "*"              
## 23  ( 1 ) "*"            "*"        " "             "*"              
## 24  ( 1 ) "*"            "*"        " "             "*"              
## 25  ( 1 ) "*"            "*"        "*"             "*"              
## 26  ( 1 ) "*"            "*"        "*"             "*"              
##           study_per_capvery high median_age_male median_age_female
## 1  ( 1 )  " "                    " "             " "              
## 2  ( 1 )  " "                    " "             "*"              
## 3  ( 1 )  " "                    " "             "*"              
## 4  ( 1 )  " "                    " "             "*"              
## 5  ( 1 )  " "                    " "             "*"              
## 6  ( 1 )  " "                    " "             "*"              
## 7  ( 1 )  " "                    " "             "*"              
## 8  ( 1 )  " "                    " "             "*"              
## 9  ( 1 )  " "                    " "             "*"              
## 10  ( 1 ) " "                    " "             "*"              
## 11  ( 1 ) " "                    " "             "*"              
## 12  ( 1 ) " "                    " "             "*"              
## 13  ( 1 ) " "                    " "             "*"              
## 14  ( 1 ) " "                    " "             "*"              
## 15  ( 1 ) " "                    "*"             "*"              
## 16  ( 1 ) " "                    "*"             "*"              
## 17  ( 1 ) " "                    "*"             "*"              
## 18  ( 1 ) " "                    "*"             "*"              
## 19  ( 1 ) "*"                    "*"             "*"              
## 20  ( 1 ) "*"                    "*"             "*"              
## 21  ( 1 ) "*"                    "*"             "*"              
## 22  ( 1 ) "*"                    "*"             "*"              
## 23  ( 1 ) "*"                    "*"             "*"              
## 24  ( 1 ) "*"                    "*"             "*"              
## 25  ( 1 ) "*"                    "*"             "*"              
## 26  ( 1 ) "*"                    "*"             "*"              
##           avg_household_size percent_married pct_no_hs18_24 pct_hs18_24
## 1  ( 1 )  " "                " "             " "            " "        
## 2  ( 1 )  " "                " "             " "            " "        
## 3  ( 1 )  " "                " "             " "            " "        
## 4  ( 1 )  " "                " "             " "            " "        
## 5  ( 1 )  " "                " "             " "            "*"        
## 6  ( 1 )  " "                " "             " "            " "        
## 7  ( 1 )  " "                " "             " "            "*"        
## 8  ( 1 )  " "                " "             " "            "*"        
## 9  ( 1 )  " "                " "             " "            "*"        
## 10  ( 1 ) " "                " "             " "            "*"        
## 11  ( 1 ) " "                " "             " "            "*"        
## 12  ( 1 ) " "                " "             " "            "*"        
## 13  ( 1 ) " "                " "             " "            "*"        
## 14  ( 1 ) " "                " "             " "            "*"        
## 15  ( 1 ) " "                " "             " "            "*"        
## 16  ( 1 ) " "                " "             " "            "*"        
## 17  ( 1 ) " "                " "             " "            "*"        
## 18  ( 1 ) " "                " "             " "            "*"        
## 19  ( 1 ) " "                " "             " "            "*"        
## 20  ( 1 ) " "                " "             " "            "*"        
## 21  ( 1 ) " "                " "             " "            "*"        
## 22  ( 1 ) " "                " "             " "            "*"        
## 23  ( 1 ) " "                "*"             " "            "*"        
## 24  ( 1 ) " "                "*"             "*"            "*"        
## 25  ( 1 ) " "                "*"             "*"            "*"        
## 26  ( 1 ) "*"                "*"             "*"            "*"        
##           pct_bach_deg18_24 pct_hs25_over pct_bach_deg25_over
## 1  ( 1 )  " "               " "           " "                
## 2  ( 1 )  " "               " "           " "                
## 3  ( 1 )  " "               " "           " "                
## 4  ( 1 )  " "               " "           " "                
## 5  ( 1 )  " "               " "           " "                
## 6  ( 1 )  " "               " "           " "                
## 7  ( 1 )  " "               " "           " "                
## 8  ( 1 )  " "               " "           " "                
## 9  ( 1 )  " "               " "           " "                
## 10  ( 1 ) " "               " "           " "                
## 11  ( 1 ) " "               " "           " "                
## 12  ( 1 ) " "               " "           "*"                
## 13  ( 1 ) " "               " "           "*"                
## 14  ( 1 ) " "               " "           "*"                
## 15  ( 1 ) " "               " "           "*"                
## 16  ( 1 ) " "               " "           "*"                
## 17  ( 1 ) " "               "*"           "*"                
## 18  ( 1 ) " "               "*"           "*"                
## 19  ( 1 ) " "               "*"           "*"                
## 20  ( 1 ) " "               "*"           "*"                
## 21  ( 1 ) " "               "*"           "*"                
## 22  ( 1 ) "*"               "*"           "*"                
## 23  ( 1 ) "*"               "*"           "*"                
## 24  ( 1 ) "*"               "*"           "*"                
## 25  ( 1 ) "*"               "*"           "*"                
## 26  ( 1 ) "*"               "*"           "*"                
##           pct_employed16_over pct_unemployed16_over pct_private_coverage
## 1  ( 1 )  " "                 " "                   " "                 
## 2  ( 1 )  " "                 " "                   " "                 
## 3  ( 1 )  " "                 "*"                   " "                 
## 4  ( 1 )  " "                 "*"                   " "                 
## 5  ( 1 )  " "                 "*"                   " "                 
## 6  ( 1 )  "*"                 " "                   " "                 
## 7  ( 1 )  "*"                 " "                   " "                 
## 8  ( 1 )  "*"                 " "                   " "                 
## 9  ( 1 )  "*"                 "*"                   " "                 
## 10  ( 1 ) "*"                 "*"                   " "                 
## 11  ( 1 ) "*"                 "*"                   " "                 
## 12  ( 1 ) "*"                 "*"                   " "                 
## 13  ( 1 ) "*"                 "*"                   " "                 
## 14  ( 1 ) "*"                 "*"                   " "                 
## 15  ( 1 ) "*"                 "*"                   " "                 
## 16  ( 1 ) "*"                 "*"                   "*"                 
## 17  ( 1 ) "*"                 "*"                   "*"                 
## 18  ( 1 ) "*"                 "*"                   "*"                 
## 19  ( 1 ) "*"                 "*"                   "*"                 
## 20  ( 1 ) "*"                 "*"                   "*"                 
## 21  ( 1 ) "*"                 "*"                   "*"                 
## 22  ( 1 ) "*"                 "*"                   "*"                 
## 23  ( 1 ) "*"                 "*"                   "*"                 
## 24  ( 1 ) "*"                 "*"                   "*"                 
## 25  ( 1 ) "*"                 "*"                   "*"                 
## 26  ( 1 ) "*"                 "*"                   "*"                 
##           pct_emp_priv_coverage pct_public_coverage
## 1  ( 1 )  " "                   " "                
## 2  ( 1 )  " "                   " "                
## 3  ( 1 )  " "                   " "                
## 4  ( 1 )  " "                   " "                
## 5  ( 1 )  " "                   " "                
## 6  ( 1 )  " "                   "*"                
## 7  ( 1 )  " "                   "*"                
## 8  ( 1 )  "*"                   "*"                
## 9  ( 1 )  "*"                   "*"                
## 10  ( 1 ) "*"                   "*"                
## 11  ( 1 ) "*"                   "*"                
## 12  ( 1 ) "*"                   "*"                
## 13  ( 1 ) "*"                   "*"                
## 14  ( 1 ) "*"                   "*"                
## 15  ( 1 ) "*"                   "*"                
## 16  ( 1 ) "*"                   "*"                
## 17  ( 1 ) "*"                   "*"                
## 18  ( 1 ) "*"                   "*"                
## 19  ( 1 ) "*"                   "*"                
## 20  ( 1 ) "*"                   "*"                
## 21  ( 1 ) "*"                   "*"                
## 22  ( 1 ) "*"                   "*"                
## 23  ( 1 ) "*"                   "*"                
## 24  ( 1 ) "*"                   "*"                
## 25  ( 1 ) "*"                   "*"                
## 26  ( 1 ) "*"                   "*"                
##           pct_public_coverage_alone pct_white pct_married_households
## 1  ( 1 )  " "                       " "       " "                   
## 2  ( 1 )  " "                       " "       " "                   
## 3  ( 1 )  " "                       " "       " "                   
## 4  ( 1 )  " "                       " "       " "                   
## 5  ( 1 )  " "                       " "       " "                   
## 6  ( 1 )  "*"                       " "       " "                   
## 7  ( 1 )  "*"                       " "       " "                   
## 8  ( 1 )  "*"                       " "       " "                   
## 9  ( 1 )  "*"                       " "       " "                   
## 10  ( 1 ) "*"                       " "       " "                   
## 11  ( 1 ) "*"                       "*"       " "                   
## 12  ( 1 ) "*"                       " "       " "                   
## 13  ( 1 ) "*"                       "*"       " "                   
## 14  ( 1 ) "*"                       "*"       " "                   
## 15  ( 1 ) "*"                       "*"       " "                   
## 16  ( 1 ) "*"                       "*"       " "                   
## 17  ( 1 ) "*"                       "*"       " "                   
## 18  ( 1 ) "*"                       "*"       " "                   
## 19  ( 1 ) "*"                       "*"       " "                   
## 20  ( 1 ) "*"                       "*"       " "                   
## 21  ( 1 ) "*"                       "*"       "*"                   
## 22  ( 1 ) "*"                       "*"       "*"                   
## 23  ( 1 ) "*"                       "*"       "*"                   
## 24  ( 1 ) "*"                       "*"       "*"                   
## 25  ( 1 ) "*"                       "*"       "*"                   
## 26  ( 1 ) "*"                       "*"       "*"                   
##           birth_rate mortality prevalence pct_non_white
## 1  ( 1 )  " "        "*"       " "        " "          
## 2  ( 1 )  " "        "*"       " "        " "          
## 3  ( 1 )  " "        "*"       " "        " "          
## 4  ( 1 )  " "        "*"       " "        " "          
## 5  ( 1 )  " "        "*"       " "        " "          
## 6  ( 1 )  " "        "*"       " "        " "          
## 7  ( 1 )  " "        "*"       " "        " "          
## 8  ( 1 )  " "        "*"       " "        " "          
## 9  ( 1 )  " "        "*"       " "        " "          
## 10  ( 1 ) " "        "*"       "*"        " "          
## 11  ( 1 ) " "        "*"       "*"        " "          
## 12  ( 1 ) " "        "*"       "*"        " "          
## 13  ( 1 ) " "        "*"       "*"        " "          
## 14  ( 1 ) "*"        "*"       "*"        " "          
## 15  ( 1 ) "*"        "*"       "*"        " "          
## 16  ( 1 ) "*"        "*"       "*"        " "          
## 17  ( 1 ) "*"        "*"       "*"        " "          
## 18  ( 1 ) "*"        "*"       "*"        "*"          
## 19  ( 1 ) "*"        "*"       "*"        "*"          
## 20  ( 1 ) "*"        "*"       "*"        "*"          
## 21  ( 1 ) "*"        "*"       "*"        "*"          
## 22  ( 1 ) "*"        "*"       "*"        "*"          
## 23  ( 1 ) "*"        "*"       "*"        "*"          
## 24  ( 1 ) "*"        "*"       "*"        "*"          
## 25  ( 1 ) "*"        "*"       "*"        "*"          
## 26  ( 1 ) "*"        "*"       "*"        "*"
rs$cp
##  [1] 6403.39877 2125.77633 1555.04810 1121.77180  829.14513  513.27874
##  [7]  355.12225  261.02523  174.84358  122.66120  101.61517   73.62995
## [13]   49.11651   35.36383   23.09511   15.47010   14.82888   15.20583
## [19]   15.70549   16.32869   17.28050   19.01271   21.00207   23.00120
## [25]   25.00034   27.00000
rs$adjr2
##  [1] 0.3116679 0.6284832 0.6707996 0.7029552 0.7247023 0.7481886 0.7599870
##  [8] 0.7670379 0.7735059 0.7774526 0.7790881 0.7812405 0.7831363 0.7842329
## [15] 0.7852199 0.7858618 0.7859840 0.7860305 0.7860679 0.7860961 0.7860998
## [22] 0.7860453 0.7859716 0.7858971 0.7858225 0.7857479

Plot Cp and adj R2 vs. No of parameters

par(mfrow=c(1,2))
plot(1:26, rs$cp, xlab="No of parameters", ylab="Cp Statistic")
abline(0,1)
plot(1:26, rs$adjr2, xlab="No of parameters", ylab="Adj R2")

Find best model of each size

best <- function(model, ...) 
{
  subsets <- regsubsets(formula(model), nvmax = 26, model.frame(model), ...)
  subsets <- with(summary(subsets),
                  cbind(p = as.numeric(rownames(which)), which, rss, rsq, adjr2, cp, bic))
  
  return(subsets)
}  

round(best(canc.fit, nbest = 1), 6)
##     p (Intercept) incidence_rate med_income poverty_percent
## 1   1           1              0          0               0
## 2   2           1              0          0               0
## 3   3           1              0          0               0
## 4   4           1              1          0               0
## 5   5           1              1          0               0
## 6   6           1              1          0               0
## 7   7           1              1          0               0
## 8   8           1              1          0               0
## 9   9           1              1          0               0
## 10 10           1              1          0               0
## 11 11           1              1          0               0
## 12 12           1              1          1               0
## 13 13           1              1          1               0
## 14 14           1              1          1               0
## 15 15           1              1          1               0
## 16 16           1              1          1               0
## 17 17           1              1          1               0
## 18 18           1              1          1               0
## 19 19           1              1          1               0
## 20 20           1              1          1               0
## 21 21           1              1          1               0
## 22 22           1              1          1               0
## 23 23           1              1          1               0
## 24 24           1              1          1               0
## 25 25           1              1          1               1
## 26 26           1              1          1               1
##    study_per_capnone study_per_capvery high median_age_male
## 1                  0                      0               0
## 2                  0                      0               0
## 3                  0                      0               0
## 4                  0                      0               0
## 5                  0                      0               0
## 6                  0                      0               0
## 7                  0                      0               0
## 8                  0                      0               0
## 9                  0                      0               0
## 10                 0                      0               0
## 11                 0                      0               0
## 12                 0                      0               0
## 13                 0                      0               0
## 14                 0                      0               0
## 15                 0                      0               1
## 16                 0                      0               1
## 17                 0                      0               1
## 18                 0                      0               1
## 19                 0                      1               1
## 20                 1                      1               1
## 21                 1                      1               1
## 22                 1                      1               1
## 23                 1                      1               1
## 24                 1                      1               1
## 25                 1                      1               1
## 26                 1                      1               1
##    median_age_female avg_household_size percent_married pct_no_hs18_24
## 1                  0                  0               0              0
## 2                  1                  0               0              0
## 3                  1                  0               0              0
## 4                  1                  0               0              0
## 5                  1                  0               0              0
## 6                  1                  0               0              0
## 7                  1                  0               0              0
## 8                  1                  0               0              0
## 9                  1                  0               0              0
## 10                 1                  0               0              0
## 11                 1                  0               0              0
## 12                 1                  0               0              0
## 13                 1                  0               0              0
## 14                 1                  0               0              0
## 15                 1                  0               0              0
## 16                 1                  0               0              0
## 17                 1                  0               0              0
## 18                 1                  0               0              0
## 19                 1                  0               0              0
## 20                 1                  0               0              0
## 21                 1                  0               0              0
## 22                 1                  0               0              0
## 23                 1                  0               1              0
## 24                 1                  0               1              1
## 25                 1                  0               1              1
## 26                 1                  1               1              1
##    pct_hs18_24 pct_bach_deg18_24 pct_hs25_over pct_bach_deg25_over
## 1            0                 0             0                   0
## 2            0                 0             0                   0
## 3            0                 0             0                   0
## 4            0                 0             0                   0
## 5            1                 0             0                   0
## 6            0                 0             0                   0
## 7            1                 0             0                   0
## 8            1                 0             0                   0
## 9            1                 0             0                   0
## 10           1                 0             0                   0
## 11           1                 0             0                   0
## 12           1                 0             0                   1
## 13           1                 0             0                   1
## 14           1                 0             0                   1
## 15           1                 0             0                   1
## 16           1                 0             0                   1
## 17           1                 0             1                   1
## 18           1                 0             1                   1
## 19           1                 0             1                   1
## 20           1                 0             1                   1
## 21           1                 0             1                   1
## 22           1                 1             1                   1
## 23           1                 1             1                   1
## 24           1                 1             1                   1
## 25           1                 1             1                   1
## 26           1                 1             1                   1
##    pct_employed16_over pct_unemployed16_over pct_private_coverage
## 1                    0                     0                    0
## 2                    0                     0                    0
## 3                    0                     1                    0
## 4                    0                     1                    0
## 5                    0                     1                    0
## 6                    1                     0                    0
## 7                    1                     0                    0
## 8                    1                     0                    0
## 9                    1                     1                    0
## 10                   1                     1                    0
## 11                   1                     1                    0
## 12                   1                     1                    0
## 13                   1                     1                    0
## 14                   1                     1                    0
## 15                   1                     1                    0
## 16                   1                     1                    1
## 17                   1                     1                    1
## 18                   1                     1                    1
## 19                   1                     1                    1
## 20                   1                     1                    1
## 21                   1                     1                    1
## 22                   1                     1                    1
## 23                   1                     1                    1
## 24                   1                     1                    1
## 25                   1                     1                    1
## 26                   1                     1                    1
##    pct_emp_priv_coverage pct_public_coverage pct_public_coverage_alone
## 1                      0                   0                         0
## 2                      0                   0                         0
## 3                      0                   0                         0
## 4                      0                   0                         0
## 5                      0                   0                         0
## 6                      0                   1                         1
## 7                      0                   1                         1
## 8                      1                   1                         1
## 9                      1                   1                         1
## 10                     1                   1                         1
## 11                     1                   1                         1
## 12                     1                   1                         1
## 13                     1                   1                         1
## 14                     1                   1                         1
## 15                     1                   1                         1
## 16                     1                   1                         1
## 17                     1                   1                         1
## 18                     1                   1                         1
## 19                     1                   1                         1
## 20                     1                   1                         1
## 21                     1                   1                         1
## 22                     1                   1                         1
## 23                     1                   1                         1
## 24                     1                   1                         1
## 25                     1                   1                         1
## 26                     1                   1                         1
##    pct_white pct_married_households birth_rate mortality prevalence
## 1          0                      0          0         1          0
## 2          0                      0          0         1          0
## 3          0                      0          0         1          0
## 4          0                      0          0         1          0
## 5          0                      0          0         1          0
## 6          0                      0          0         1          0
## 7          0                      0          0         1          0
## 8          0                      0          0         1          0
## 9          0                      0          0         1          0
## 10         0                      0          0         1          1
## 11         1                      0          0         1          1
## 12         0                      0          0         1          1
## 13         1                      0          0         1          1
## 14         1                      0          1         1          1
## 15         1                      0          1         1          1
## 16         1                      0          1         1          1
## 17         1                      0          1         1          1
## 18         1                      0          1         1          1
## 19         1                      0          1         1          1
## 20         1                      0          1         1          1
## 21         1                      1          1         1          1
## 22         1                      1          1         1          1
## 23         1                      1          1         1          1
## 24         1                      1          1         1          1
## 25         1                      1          1         1          1
## 26         1                      1          1         1          1
##    pct_non_white       rss      rsq    adjr2         cp       bic
## 1              0 1502122.6 0.311906 0.311668 6403.39877 -1066.295
## 2              0  810467.6 0.628740 0.628483 2125.77633 -2844.606
## 3              0  717905.6 0.671141 0.670800 1555.04810 -3187.721
## 4              0  647558.0 0.703366 0.702955 1121.77180 -3478.311
## 5              0  599941.7 0.725178 0.724702  829.14513 -3691.449
## 6              0  548569.4 0.748711 0.748189  513.27874 -3942.634
## 7              0  522685.6 0.760568 0.759987  355.12225 -4074.590
## 8              0  507154.8 0.767682 0.767038  261.02523 -4153.943
## 9              0  492903.2 0.774210 0.773506  174.84358 -4228.490
## 10             0  484146.5 0.778222 0.777453  122.66120 -4272.413
## 11             0  480421.9 0.779928 0.779088  101.61517 -4286.800
## 12             0  475575.8 0.782148 0.781241   73.62995 -4308.180
## 13             0  471290.8 0.784111 0.783136   49.11651 -4326.412
## 14             0  468744.9 0.785277 0.784233   35.36383 -4334.122
## 15             0  466438.8 0.786333 0.785220   23.09511 -4340.429
## 16             0  464883.3 0.787046 0.785862   15.47010 -4342.129
## 17             0  464456.4 0.787241 0.785984   14.82888 -4336.818
## 18             1  464194.1 0.787361 0.786031   15.20583 -4330.482
## 19             1  463951.6 0.787472 0.786068   15.70549 -4324.024
## 20             1  463729.1 0.787574 0.786096   16.32869 -4317.442
## 21             1  463559.7 0.787652 0.786100   17.28050 -4310.529
## 22             1  463516.4 0.787672 0.786045   19.01271 -4302.829
## 23             1  463514.7 0.787673 0.785972   21.00207 -4294.869
## 24             1  463514.6 0.787673 0.785897   23.00120 -4286.899
## 25             1  463514.4 0.787673 0.785823   25.00034 -4278.929
## 26             1  463514.4 0.787673 0.785748   27.00000 -4270.959

17 parameter model minimizes Cp, while 21 parameter model minimizes adjR2.

Fit regression models that minimize Cp and adjR2.

Cp.fit <- lm(target_death_rate ~ incidence_rate + med_income + median_age_male + median_age_female + pct_hs18_24 
             + pct_hs25_over + pct_bach_deg25_over + pct_employed16_over + pct_unemployed16_over 
             + pct_private_coverage + pct_emp_priv_coverage + pct_public_coverage 
             + pct_public_coverage_alone + pct_white + birth_rate + mortality + prevalence, data = cancer_reg)

adjR2.fit <- lm(target_death_rate ~ incidence_rate + med_income + study_per_cap + median_age_male 
                + median_age_female + pct_hs18_24 + pct_hs25_over + pct_bach_deg25_over + pct_employed16_over 
                + pct_unemployed16_over + pct_private_coverage + pct_emp_priv_coverage + pct_public_coverage 
                + pct_public_coverage_alone + pct_white + pct_married_households + birth_rate + mortality 
                + prevalence + pct_non_white, data = cancer_reg)